clear all
set more off , permanently
capture log close
set scheme lean2
set matsize 5000
set type double

********************************************************************************
***** This file creates local markets in Germany based on driving distance *****
********************************************************************************
	
	
*****************************
****   Prepare data      ****
*****************************	

// Prepare brand data
	use "$dta\stations.dta" , clear
	keep id_data brand_id
	drop if brand_id < 1 | brand_id > 25
	rename id_data master_id_data
	rename brand_id master_brand
	save "$dta\master_brand.dta" , replace
	rename master_id_data using_id_data
	rename master_brand using_brand
	save "$dta\using_brand.dta" , replace


// Load data
cd "$dta"
use stations.dta , clear

// Keep relevant variables
keep id_data latitude longitude

// Save a data set which contains the string identifiers for the stations and their code
encode id_data, gen(station_id)
save "$dta\stations_coordinates_encoded.dta", replace



use "$dta\stations_coordinates_encoded.dta" , clear
foreach i in id_data latitude longitude station_id {
	rename `i' master_`i'
	}
// Find all stations within 15km
geonear master_id_data master_latitude master_longitude using "stations_coordinates_encoded.dta", neighbors(id_data latitude longitude) long within(15)
rename km_to_id_data dist_to_center

//	Calculate driving distanc
merge m:1 id_data using "stations_coordinates_encoded.dta", keep(match master) keepusing(latitude longitude) 
drop _merge
rename id_data using_id_data
rename latitude using_latitude 
rename longitude using_longitude

rename master_id_data id_data
merge m:1 id_data using "stations_coordinates_encoded.dta", keep(match master) keepusing(latitude longitude) 
drop _merge
rename id_data master_id_data
rename latitude master_latitude 
rename longitude master_longitude


//	Use station distance data
rename master_id_data station_id
rename using_id_data station_id_2
																												
//	Calculate driving distance for those not matched
osrmtime master_latitude master_longitude using_latitude using_longitude , mapfile("$raw\06_Germany_osrm_map\germany-latest.osrm") nocleanup

//  Append other data
gen tot_distance_s_s = distance + jumpdist1 + jumpdist2

drop  distance duration jumpdist1 jumpdist2 return_code

*append using pepe.dta
cap drop _merge
//	Rename
rename  station_id master_id_data
rename  station_id_2 using_id_data

//	Save raw market data
compress
cd "$dta"
save station_markets.dta , replace


// Save markets
cd "$dta"
use station_markets.dta , clear
foreach i in 1 3 5 10 {
	preserve
	keep if tot_distance_s_s <= `i'*1000
	merge m:1 master_id_data using "$dta\master_brand.dta"
	drop if _merge == 2
	drop _merge
	merge m:1 using_id_data using "$dta\using_brand.dta"
	drop if _merge == 2
	drop _merge
	drop if master_brand == using_brand & master_id_data != using_id_data
*	egen market_id = group(master_id_data)
*	drop master_id_data
	compress
	save station_markets_`i'.dta , replace
	restore
	}
	
	
	
	
*************************************************************
** Create treatment and control group for MTU introduction **
*************************************************************

// Concept: The control group here are stations that are in a market on their own.

** Control group for 3 km markets **
cd "$dta"
use station_markets_1.dta , clear
bysort master_id: egen count = count(tot_distance)
keep if count == 1
assert master_id_data == using_id_data
keep master_id_data
rename master_id_data id_data
save control_group_mtu_intro_1km_market.dta , replace

** Control group for 3 km markets **
cd "$dta"
use station_markets_3.dta , clear
bysort master_id: egen count = count(tot_distance)
keep if count == 1
assert master_id_data == using_id_data
keep master_id_data
rename master_id_data id_data
save control_group_mtu_intro_3km_market.dta , replace


** Control group for 5 km markets **
cd "$dta"
use station_markets_5.dta , clear
bysort master_id: egen count = count(tot_distance)
keep if count == 1
assert master_id_data == using_id_data
keep master_id_data
rename master_id_data id_data
save control_group_mtu_intro_5km_market.dta , replace


** Control group for 10 km markets **
cd "$dta"
use station_markets_10.dta , clear
bysort master_id: egen count = count(tot_distance)
keep if count == 1
assert master_id_data == using_id_data
keep master_id_data
rename master_id_data id_data
save control_group_mtu_intro_10km_market.dta , replace


* Erase intermediate data 
erase master_brand.dta 
erase using_brand.dta 
*erase station_markets_1.dta 
*erase station_markets_3.dta 
*erase station_markets_5.dta // need it for summary stat table 
erase station_markets_10.dta 
erase stations_coordinates_encoded.dta
*erase station_markets.dta 